
**********
* Readme *
**********

* This script resamples from the original dataset and reestimates the potential wage model and the reservation wage model on each new draw.


* Root folder (PATH TO BE DEFINED BY THE USER)
**********************************************
clear all
global analysis "C:/***/replication_package"


* Timestamped log
*****************
global today = strofreal(date(c(current_date), "DMY"), "%tdYYNNDD")
log using "${analysis}/code/logs/4_6_boot_wages_${today}.smcl", replace


***********************
* Parallel parameters *
***********************

* How many processor do I have?
parallel numprocessors
global myprocessors = r(numprocessors) - 1

* Set number of child processes
parallel initialize $myprocessors

* How many bootstrap rounds per processor (nb: total number of replications = B * n. child processes)
global B = ceil(420 / $myprocessors) // final


**************
* Covariates *
**************

use "${analysis}/data/2_2_pof_clean.dta", clear
svyset psu_id [pweight = pweight], strata(strata_id) singleunit(centered) vce(linearized) 

* Covariates for the wage model
unab wage_covariates_all : rg_* ae_* region_* // all
unab wage_ref_groups : rg_1_nonwhite_female ae_1_1 region_SP_c // reference group: non white female, no education, Sao Paulo
global wage_covariates : list wage_covariates_all - wage_ref_groups // final list of regressors

* Extended set of covariates for selection correction & reservation wage estimation
unab covariates_all : rg_* ae_* at_* fp_* n_kids n_youngs n_adults n_seniors region_* // all
unab ref_groups : rg_1_nonwhite_female ae_1_1 fp_h_wp_nk region_SP_c // reference group: non white female, no education, head w partner no kids, Sao Paulo
global covariates : list covariates_all - ref_groups // final list of regressors


***************************************************
* Run the bootstrap routine in parallel instances *
***************************************************

* Folders for temp files
cap mkdir "${analysis}/results/estimations/temp/"
cap mkdir "${analysis}/results/estimations/temp/potential_wages/"
cap mkdir "${analysis}/results/estimations/temp/reservation_wages/"

* POTENTIAL WAGES: Remove previous estimations, if any
local files_to_delete : dir "${analysis}/results/estimations/temp/potential_wages/" files "*.ster"
foreach file of local files_to_delete {
  erase "${analysis}/results/estimations/temp/potential_wages/`file'"
}

* RESERVATION WAGES: Remove previous estimations, if any
local files_to_delete : dir "${analysis}/results/estimations/temp/reservation_wages/" files "*.ster"
foreach file of local files_to_delete {
  erase "${analysis}/results/estimations/temp/reservation_wages/`file'"
}

* POTENTIAL & RESERVATION WAGES: Run batch estimation routine
parallel do "${analysis}/code/prog_boot_wages.do", nodata


**********************************
* Fitted values: potential wages *
**********************************

use "${analysis}/data/2_2_pof_clean.dta", clear
svyset psu_id [pweight = pweight], strata(strata_id) singleunit(centered) vce(linearized) 
keep if ( !missing(ln_winc_employee) | !missing(ln_winc_oaw) )

local models_to_fit : dir "${analysis}/results/estimations/temp/potential_wages/" files "*.ster"

local i = 1
foreach file of local models_to_fit {

  display "model `i' comes from `file'"
  estimates use "${analysis}/results/estimations/temp/potential_wages/`file'"

  qui predictnl exp_ei = exp(ln_winc_employee - xb())  // residuals
  qui mean exp_ei [pweight = pweight]
  qui matrix temp = e(b)
  qui scalar mean_exp_e = temp[1,1] // correction term

  estimates use "${analysis}/results/estimations/temp/potential_wages/`file'"
  qui predict est_ln_wage_`i', xb // fitted log wage
  qui predictnl est_wage_`i' = exp(xb()) * scalar(mean_exp_e)  // unbiased fitted wage level
  qui drop exp_ei

  local i = `i' + 1
}

************************************
* Fitted values: reservation wages *
************************************

keep if !missing(ln_winc_oaw)

local models_to_fit : dir "${analysis}/results/estimations/temp/reservation_wages/" files "*.ster"

local i = 1
foreach file of local models_to_fit {

  display "model `i' comes from `file'"
  estimates use "${analysis}/results/estimations/temp/reservation_wages/`file'"

  qui predict est_ln_rw_10_`i', xb
  qui predictnl est_rw_10_`i' = exp(xb())

  local i = `i' + 1
}

* Store the fitted values
*************************

keep  ind_id est_ln_wage_* est_wage_* est_ln_rw_10_* est_rw_10_*
order ind_id est_ln_wage_* est_wage_* est_ln_rw_10_* est_rw_10_*
describe
save "${analysis}/data/4_6_est_boot_wages.dta", replace


* End of script
***************
cap log close